/************************************   

DESCRIPTION: This program puts male beneficiaries into bins, or blocks, based
 on their propensity score for estimation of the blocking estimator. The blocks
 are determined by either the size of the block or whether the 'treated' and the 
 'control' group beneficiaries are statistically different from each other within the 
  block.
  
  Datasets used:
   (1) maleLPManalysis2
  
  Datasets created:
   (1) maleAllBlocks
   (2) maleDiabBlocks
   (3) maleHypBlocks


*************************************/  


clear all
  
  cd "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Program\PropensityScore\Patients\ResponsetoReferee\OtherPropScoreMethods\Blocking"
   
set more off
capture log close
clear all

set matsize 10000

global origData "N:\MedicareClaims-P045601-BE"
*Use data from LPM
global dataIn "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Program\PropensityScore\Patients\ResponsetoReferee\OtherPropScoreMethods\LPM"
global dataOut "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global dataProp "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out\PropScore\Patients"
global logs "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Logs\PropScore\Patients"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global skapath "N:\MedicareClaims-P045601-BE\Work\ska"
global output

adopath +  "N:/SIL-Common/estout"
adopath +  "N:/SIL-Common/outreg2"
adopath +  "N:/SIL-Common/reghdfe-master/package"

log using "./1a.BlockEstimation_Malecreate.log", replace

*Columnames program

	program getColnames 
	 quietly {
	  matrix A = e(b)'
	 
	 local rownames : rowfullnames A
	 local c : word count `rownames'
	 local names : colfullnames A

	 svmat A, names(col)

	 gen rownames = ""
	 forvalues i = 1/`c' {

	  replace rownames = "`:word `i' of `rownames''" in `i' 
	   } 

	 }

	end


	program getRegStats
	 quietly {
	  matrix A = e(b)'
	 
	 local rownames : rowfullnames A
	 local c : word count `rownames'
	 local n = `c' + 1
	 local rsq = `c' + 3

	 replace beta1 =e(N) in `n' 
	 replace beta1 =e(r2_p) in `rsq' 

	 }

	end

*This creates the blocking estimator blocks from the t-stats
*Want it dynamic because we'll keep splitting the blocks until t-stat is < 1.96

	 program blocking
	*Define arguments
	  args centiles cnt
	  *get the centile estimates conditional on treatment for each centile we specify (start with 4)
	  centile EHAT if vmerger==1, centile(`centiles' )
	  *the count of centile estimates we care about (start with 4)
	  local wc = `cnt' -1 
	 capture drop block  

	 *this generates the block - start with the first one outside the loop
	 gen block =1 if  EHAT < r(c_1)
	  forval k = 1(1)`wc' {
	   di "`k'"
	   local kp1 = `k' +1
	   di "`kp1'"
	   if `k' < `wc' {
	   replace block =`kp1' if  r(c_`k') <= EHAT & EHAT < r(c_`kp1')
	   }
	   if `k' >= `wc' {
	   replace block =`kp1' if  r(c_`k') <= EHAT 
	   }
	}
	 *Just a check to see all the observations have a block  
	   tab block, miss
	   
	 *now create the Log odds ratio for calculation of the t within the block
	  levelsof block, local(locblock)
	   capture drop lor
	   gen lor = log(EHAT/(1-EHAT))
	   
	 *construct the t-stat for differences in LOR of treated and untreated groups within each block
	  *first get the relevant statistics for use in the t-stat
	forval vm = 0(1)1 {
	 foreach bk of local locblock {
		di `vm' " " `bk'
	 summ lor if vmerger ==`vm' & block ==`bk' 
	  scalar mn`vm'_`bk' = r(mean)
	  scalar var`vm'_`bk' = r(Var)
	  scalar ct`vm'_`bk' = r(N)
		}
	  }
	  *now calculate the t-stat
	 foreach b of local locblock {
	  scalar t`b' = (mn0_`b' - mn1_`b')/sqrt(var0_`b'/ct0_`b' + var1_`b'/ct1_`b' )
	  di `b'
	  di t`b'
	 }
	 
	 end
 
 

 **************** All *************************
  
 
use $dataIn\maleLPManalysis2.dta
 gen EHAT = ehat
 *Drop these contradictions
 drop if mxhypertension_p_all==1 & mxicd8_chronic_p_all==0
  drop if EHAT ==.
*Diabetes 
  keep BENE_ID EHAT vmerger
*Unique people
  egen maxvmerger = max(vmerger), by(BENE_ID)
  drop vmerger
  rename maxvmerger vmerger
  duplicates drop

*this is just a while variable.  
local wvar = 0 
*this is the primer. Imbens said most people have 5 blocks. We'll have many
local cen 20 40 60 80 

local looper = 0

*Doing while at least one t-stat is > 1.96. Currently just a loop. Change later
 while `wvar' < 1 { 
 local looper =`looper' + 1
 *Set to primer if first iteration 
 if `looper' ==1 {
 local cen `cen' 
 }
 *Set to split groups if relevant
 if `looper' > 1 {
 local cen `newcen'
 }

  local wc : word count `cen' 
  local wc = `wc' +1  

*Effectively drop these locals
  local newcen = .
  forval i=1(1)`wc' {
  local c_a`i' = .
  }

 *THE FOLLOWING LOOP LOOKS TO SEE IF THE BLOCK NEEDS TO BE SPLIT AT MEDIAN
 *Looping over current set of centiles. primer list has 4
   forval i=1(1)`wc' {
   local j = `i' -1
   blocking "`cen'" `wc' 
      
  *THIS gets the current and the previous cutoff values to take "median"
   *There is one more group than cutoff value. Dealing with that issue
   if `i' < `wc'  {
   local temp`i' `: word `i' of `cen''
   }
   *There is one more group than cutoff value. Dealing with that issue
   if `j' >=1  {
   local temp`j' `: word `j' of `cen''
   }

 *Finds the value that splits the first block if t-stat is big
 if `i' ==1 & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' ) / 2 
   di `c_a`i'' 
   }

  *Finds the value that splits all blocks other than first one if t-stat is big
  if `i' >1 & `i'<`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

  *Finds the value that splits the last block if t-stat is big
   if `i'==`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (100 - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

 *Creates the centile groups for construction of block
 *When we don't split groups, the splitter becomes missing, need to remove it use the "not" local to take it out of the block name
 local not .
 local wvar = 1
 *This is a loop for all of the centiles
  forval k=1(1)`wc' {
 *The first centile
    if `k' ==1 {
	local temp`k' `: word `k' of `cen''
     local newcen `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The middle centiles
   if `k' >1 & `k' != `wc' {
	local temp`k' `: word `k' of `cen''
    local newcen `newcen' `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The last centile
    if `k' == `wc' {
    local newcen `newcen' `c_a`k'' 
	 local newcen: list newcen- not
   }
*Get the while variable
   local allblock = 1
   if abs(t`k') > 1.96 & ct1_`k' > 35 { 
     local allblock = 0
    }
	local wvar = `wvar' * `allblock'
   }
    di t`i' " " ct1_`i'
    di `c_a`i''
    di `temp`i'' " " `temp`j'' 
    di "`cen'"
    di "`newcen'"
    di "`wvar'" " " t`i' " " ct1_`i'

   }
  }
  
 
save ./maleAllBlocks.dta, replace
clear 
 
 
 
 
**************** DIABETES *************************
 
 
 
use $dataIn\maleLPManalysis2.dta
 gen EHAT = ehat

 *Drop these contradictions
 drop if mxhypertension_p_all==1 & mxicd8_chronic_p_all==0
  keep if mxdiabetes_p_all ==1
  drop if EHAT ==.
*Diabetes 
  keep BENE_ID EHAT vmerger
*Unique people
  egen maxvmerger = max(vmerger), by(BENE_ID)
  drop vmerger
  rename maxvmerger vmerger
  duplicates drop

*this is just a while variable.  
local wvar = 0 
*this is the primer. Imbens said most people have 5 blocks. We'll have many
local cen 20 40 60 80 

local looper = 0

*Doing while at least one t-stat is > 1.96. Currently just a loop. Change later
 while `wvar' < 1 { 
 local looper =`looper' + 1
 *Set to primer if first iteration 
 if `looper' ==1 {
 local cen `cen' 
 }
 *Set to split groups if relevant
 if `looper' > 1 {
 local cen `newcen'
 }

  local wc : word count `cen' 
  local wc = `wc' +1  

*Effectively drop these locals
  local newcen = .
  forval i=1(1)`wc' {
  local c_a`i' = .
  }

 *THE FOLLOWING LOOP LOOKS TO SEE IF THE BLOCK NEEDS TO BE SPLIT AT MEDIAN
 *Looping over current set of centiles. primer list has 4
   forval i=1(1)`wc' {
   local j = `i' -1
   blocking "`cen'" `wc' 
      
  *THIS gets the current and the previous cutoff values to take "median"
   *There is one more group than cutoff value. Dealing with that issue
   if `i' < `wc'  {
   local temp`i' `: word `i' of `cen''
   }
   *There is one more group than cutoff value. Dealing with that issue
   if `j' >=1  {
   local temp`j' `: word `j' of `cen''
   }

 *Finds the value that splits the first block if t-stat is big
 if `i' ==1 & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' ) / 2 
   di `c_a`i'' 
   }

  *Finds the value that splits all blocks other than first one if t-stat is big
  if `i' >1 & `i'<`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

  *Finds the value that splits the last block if t-stat is big
   if `i'==`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (100 - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

 *Creates the centile groups for construction of block
 *When we don't split a groups, the splitter becomes missing, need to remove it use the "not" local to take it out of the block name
 local not .
 local wvar = 1
 *This is a loop for all of the centiles
  forval k=1(1)`wc' {
 *The first centile
    if `k' ==1 {
	local temp`k' `: word `k' of `cen''
     local newcen `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The middle centiles
   if `k' >1 & `k' != `wc' {
	local temp`k' `: word `k' of `cen''
    local newcen `newcen' `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The last centile
    if `k' == `wc' {
    local newcen `newcen' `c_a`k'' 
	 local newcen: list newcen- not
   }
*Get the while variable
   local allblock = 1
   if abs(t`k') > 1.96 & ct1_`k' > 35 { 
     local allblock = 0
    }
	local wvar = `wvar' * `allblock'
   }
    di t`i' " " ct1_`i'
    di `c_a`i''
    di `temp`i'' " " `temp`j'' 
    di "`cen'"
    di "`newcen'"
    di "`wvar'" " " t`i' " " ct1_`i'

   }
  }
  
 
save ./maleDiabBlocks.dta, replace
clear 
 
 **************** HYPERTENSION *************************
 
 
 
use $dataIn\maleLPManalysis2.dta
 gen EHAT = ehat
 *Drop these contradictions
 drop if mxhypertension_p_all==1 & mxicd8_chronic_p_all==0
*Hypertension
  keep if mxhypertension_p_all ==1
  drop if EHAT ==.
*Unique people
  egen maxvmerger = max(vmerger), by(BENE_ID)
  drop vmerger
  rename maxvmerger vmerger
  duplicates drop
  keep BENE_ID EHAT vmerger
  duplicates drop

*this is just a while variable.  
local wvar = 0 
*this is the primer. Imbens said most people have 5 blocks. We'll have many
local cen 20 40 60 80 

local looper = 0

*Doing while at least one t-stat is > 1.96. Currently just a loop. Change later
 while `wvar' < 1 { 
 local looper =`looper' + 1
 *Set to primer if first iteration 
 if `looper' ==1 {
 local cen `cen' 
 }
 *Set to split groups if relevant
 if `looper' > 1 {
 local cen `newcen'
 }

  local wc : word count `cen' 
  local wc = `wc' +1  

*Effectively drop these locals
  local newcen = .
  forval i=1(1)`wc' {
  local c_a`i' = .
  }

 *THE FOLLOWING LOOP LOOKS TO SEE IF THE BLOCK NEEDS TO BE SPLIT AT MEDIAN
 *Looping over current set of centiles. primer list has 4
   forval i=1(1)`wc' {
   local j = `i' -1
   blocking "`cen'" `wc' 
      
  *THIS gets the current and the previous cutoff values to take "median"
   *There is one more group than there are cutoff values (this is necessarily true if think about it). Dealing with that issue
   if `i' < `wc'  {
   local temp`i' `: word `i' of `cen''
   }
   *There is one more group than cutoff value. Dealing with that issue
   if `j' >=1  {
   local temp`j' `: word `j' of `cen''
   }

 *Finds the value that splits the first block if t-stat is big
 if `i' ==1 & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' ) / 2 
   di `c_a`i'' 
   }

  *Finds the value that splits all blocks other than first and last if t-stat is big
  if `i' >1 & `i'<`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

  *Finds the value that splits the last block if t-stat is big
   if `i'==`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (100 - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

 *Creates the centile groups for construction of block
 *When we don't split a block, the splitter is a missing value and will need to remove it from local name, we use the "not" local to remove that missing value
 local not .
 local wvar = 1
 *This is a loop for all of the centiles
  forval k=1(1)`wc' {
 *The first centile
    if `k' ==1 {
	local temp`k' `: word `k' of `cen''
     local newcen `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The middle centiles
   if `k' >1 & `k' != `wc' {
	local temp`k' `: word `k' of `cen''
    local newcen `newcen' `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The last centile
    if `k' == `wc' {
    local newcen `newcen' `c_a`k'' 
	 local newcen: list newcen- not
   }
*Get the while variable
   local allblock = 1
   if abs(t`k') > 1.96 & ct1_`k' > 35 { 
     local allblock = 0
    }
	local wvar = `wvar' * `allblock'
   }
    di t`i' " " ct1_`i'
    di `c_a`i''
    di `temp`i'' " " `temp`j'' 
    di "`cen'"
    di "`newcen'"
    di "`wvar'" " " t`i' " " ct1_`i'

   }
  }
  
 
save ./maleHypBlocks.dta, replace
clear
 
  